Here’s the task: https://preppindata.blogspot.com/2021/01/2021-week-1.html. Essentially:

library(stringr)
library(dplyr)
library(tidyr)
library(ggplot2)
library(googlesheets4)
library(lubridate)
library(plotly)
library(scales)
library(gt)
library(wesanderson)

Get data:

sheet <- "1GYv4573GnJa-C21NYeDj-OhFSTwrK0SnQNF2IQFqa50"
gs4_deauth()
data <- read_sheet(sheet)

data %>% head(10) %>% gt()
Order ID Customer Age Bike Value Existing Customer? Date Store - Bike
1 22 481 No 2021-04-25 York - Road
2 28 1825 No 2021-01-23 York - Road
3 51 1903 No 2021-07-03 York - Rood
4 59 1059 No 2021-01-24 York - Road
5 44 1764 Yes 2021-08-12 York - Mountain
6 16 967 Yes 2021-08-15 London - Mountain
7 35 1575 Yes 2021-03-13 London - Mountain
8 50 1074 No 2021-09-22 London - Mountain
9 37 1977 No 2021-02-09 London - Gravel
10 55 1352 No 2021-11-24 Leeds - Gravel

Tidy it:

tidy_data <- data %>%
  rename_with(~ str_remove(., "\\?|- ") %>% 
                str_replace_all(., " ", "_") %>% 
                str_to_lower(.)) %>%
  separate(store_bike,
           into = c("store", "bike")) %>%
  mutate(bike         = case_when(bike %in% c("Road", "Rood", "Rowd")       ~ "Road",
                                  bike %in% c("Mountain", "Mountaen")       ~ "Mountain",
                                  bike %in% c("Graval", "Gravel", "Gravle") ~ "Gravel"),
         date = as.Date(date)) %>%
  filter(!order_id %in% 1:10)

tidy_data %>% head(10) %>% gt()
order_id customer_age bike_value existing_customer date store bike
11 57 902 No 2021-10-04 Birmingham Road
12 31 946 Yes 2021-01-17 Leeds Road
13 17 1296 Yes 2021-10-25 Birmingham Road
14 59 1166 Yes 2021-07-18 Manchester Road
15 24 1781 No 2021-10-10 Manchester Mountain
16 59 1074 No 2021-10-06 York Mountain
17 57 1188 No 2021-09-14 York Mountain
18 56 544 No 2021-11-23 York Mountain
19 34 579 Yes 2021-11-24 York Gravel
20 17 1021 Yes 2021-06-24 York Gravel

Add the missing dates the the missing zeros …

complete_data <- tidy_data %>% select(date, bike, bike_value) %>%
  complete(date = seq.Date(min(date), max(date), by = "day"), 
           bike = bike) %>%
  replace_na(list(bike_value = 0))

complete_data %>% head(10) %>% gt()
date bike bike_value
2021-01-05 Gravel 0
2021-01-05 Mountain 0
2021-01-05 Road 2898
2021-01-06 Gravel 0
2021-01-06 Mountain 0
2021-01-06 Road 0
2021-01-07 Gravel 0
2021-01-07 Mountain 3450
2021-01-07 Road 0
2021-01-08 Gravel 1132

Summarise average sales per day of the month …

summarised_data <- complete_data %>%
  mutate(quarter      = paste0("Quarter ", quarter(date)),
         day_of_month = mday(date)) %>%
  group_by(quarter, day_of_month, bike) %>%
  summarise(average_sales_value = mean(bike_value)) %>%
  ungroup() %>%
  arrange(day_of_month) %>%
  group_by(quarter, bike) %>%
  mutate(typical_cumulative_monthly_sales = cumsum(average_sales_value)) %>%
  ungroup()
## `summarise()` regrouping output by 'quarter', 'day_of_month' (override with `.groups` argument)
summarised_data %>% arrange(quarter, bike, day_of_month) %>% head(10) %>% gt()
quarter day_of_month bike average_sales_value typical_cumulative_monthly_sales
Quarter 1 1 Gravel 994.0000 994.000
Quarter 1 2 Gravel 2461.4000 3455.400
Quarter 1 3 Gravel 661.6667 4117.067
Quarter 1 4 Gravel 556.5000 4673.567
Quarter 1 5 Gravel 0.0000 4673.567
Quarter 1 6 Gravel 601.2500 5274.817
Quarter 1 7 Gravel 1678.5000 6953.317
Quarter 1 8 Gravel 1705.0000 8658.317
Quarter 1 9 Gravel 568.0000 9226.317
Quarter 1 10 Gravel 499.6667 9725.983

Plot …

palette <- wes_palette("GrandBudapest1", n = 3)

plot <- ggplot(data = summarised_data, aes(x = day_of_month, y = typical_cumulative_monthly_sales)) +
          geom_line(aes(col = bike),
                    lwd = 1.2, alpha = 0.7) +
          scale_colour_manual(values = palette) +
          scale_y_continuous(labels = unit_format(unit = "K", scale = 1e-3)) +
          labs(title = "The profile of monthly bike sales",
               x     = "Day of the month",
               y     = "Average running total of sales (£)",
               col   = "Bike type") +
          facet_wrap(~ quarter, ncol = 1, strip.position = "left") + 
          theme_light() +
          theme(strip.text.y.left = element_text(angle = 0))

plot

Interactive plot (after a bit of plotly gymnastics) …

int_plot <- plot %>% ggplotly()
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
str(int_plot[["x"]][['layout']][['annotations']][[2]])
## List of 13
##  $ text          : chr "Average running total of sales (£)"
##  $ x             : num -0.0134
##  $ y             : num 0.5
##  $ showarrow     : logi FALSE
##  $ ax            : num 0
##  $ ay            : num 0
##  $ font          :List of 3
##   ..$ color : chr "rgba(0,0,0,1)"
##   ..$ family: chr ""
##   ..$ size  : num 14.6
##  $ xref          : chr "paper"
##  $ yref          : chr "paper"
##  $ textangle     : num -90
##  $ xanchor       : chr "right"
##  $ yanchor       : chr "center"
##  $ annotationType: chr "axis"
int_plot[["x"]][['layout']][['annotations']][[2]][["x"]] <- -0.05

int_plot